# -*- coding: utf-8 -*-
"""
PPO 训练曲线输出 PDF（可编辑文字）版本
使用以下真实 tag：
- Policy Loss: train/loss/policy_avg
- RLHF Reward: train/objective/rlhf_reward
"""

import os
import glob
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator

# ============================================================
# ★★★ 确保 PDF 文字可编辑（Type42） ★★★
# ============================================================
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
plt.rcParams['font.family'] = 'Arial'  # AI/Inkscape 可编辑字体

# ============================================================
# 全局参数
# ============================================================
EMA_ALPHA = 0.2
FIG_SIZE = (10, 5)
DPI = 300
FONT_SIZE = 18

plt.rcParams.update({
    "font.size": FONT_SIZE,
    "axes.labelsize": FONT_SIZE,
    "axes.titlesize": FONT_SIZE + 4,
    "legend.fontsize": FONT_SIZE - 2,
})


# ============================================================
# 工具函数
# ============================================================
def ema_smooth(values, alpha=0.2):
    if not values:
        return []
    out = [values[0]]
    for v in values[1:]:
        out.append(alpha * v + (1 - alpha) * out[-1])
    return out


def prepare_series(steps, values):
    df = pd.DataFrame({"step": steps, "val": values}).dropna()
    df = df.groupby("step", as_index=False)["val"].mean().sort_values("step")
    return df["step"].tolist(), df["val"].tolist()


# ============================================================
# 绘图
# ============================================================
def plot_pdf(steps, values, title, ylabel, save_path):
    steps, values = prepare_series(steps, values)
    smooth = ema_smooth(values, EMA_ALPHA)

    plt.figure(figsize=FIG_SIZE)
    plt.plot(steps, values, color="gray", linewidth=1, alpha=0.6, label="Raw Curve")
    plt.plot(steps, smooth, color="black", linewidth=2.2, label="Smoothed")

    plt.xlabel("Training Step")
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.tight_layout()

    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.savefig(save_path, dpi=DPI, format="pdf")
    plt.close()


# ============================================================
# 读取 TensorBoard
# ============================================================
def read_tb(path):
    def latest(path):
        if os.path.isdir(path):
            fs = sorted(glob.glob(os.path.join(path, "events.out.tfevents*")))
            return fs[-1] if fs else None
        return path

    event_file = latest(path)
    if not event_file:
        return ([], []), ([], [])

    ea = event_accumulator.EventAccumulator(
        event_file, size_guidance={event_accumulator.SCALARS: 0}
    )
    ea.Reload()

    # ★ 使用你提供的真实 tag ★
    policy_tag = "train/loss/policy_avg"
    reward_tag = "train/objective/rlhf_reward"

    # Policy loss
    ls = ea.Scalars(policy_tag)
    loss_steps = [e.step for e in ls]
    loss_vals = [float(e.value) for e in ls]

    # Reward
    rs = ea.Scalars(reward_tag)
    reward_steps = [e.step for e in rs]
    reward_vals = [float(e.value) for e in rs]

    return (loss_steps, loss_vals), (reward_steps, reward_vals)


# ============================================================
# 主流程
# ============================================================
def process(name, path, out_dir):
    (loss_steps, loss_vals), (reward_steps, reward_vals) = read_tb(path)

    if loss_vals:
        plot_pdf(
            loss_steps, loss_vals,
            f"{name} — Policy Loss (Raw + EMA)",
            "Policy Loss",
            os.path.join(out_dir, f"{name}_policy_loss.pdf")
        )

    if reward_vals:
        plot_pdf(
            reward_steps, reward_vals,
            f"{name} — RLHF Reward (Raw + EMA)",
            "RLHF Reward",
            os.path.join(out_dir, f"{name}_rlhf_reward.pdf")
        )


# ============================================================
# 执行入口
# ============================================================
if __name__ == "__main__":

    inputs = {
        "PPO_Expert": r"C:\Users\Lenovo\Desktop\000\tensorboard\E_PPO",
        "PPO_Public": r"C:\Users\Lenovo\Desktop\000\tensorboard\P_PPO",
        "PPO_Fusion": r"C:\Users\Lenovo\Desktop\000\tensorboard\H_PPO",
    }

    out_dir = r"C:\Users\Lenovo\Desktop\000\tensorboard\出图"
    os.makedirs(out_dir, exist_ok=True)

    for name, path in inputs.items():
        print(f"==> Processing: {name}")
        process(name, path, out_dir)

    print("\n🎉 所有 PPO PDF 训练曲线已生成（文字完全可编辑）！")
